python -m vllm.entrypoints.openai.api_server --served-model-name Qwen1_5-72B-Chat --model /share/project/lijijie/tools/transfer_hf/Qwen1___5-72B-Chat --gpu_memory_utilization 0.9 --tensor-parallel-size 8 --worker-use-ray
